library(tidyverse)
## Warning in system("timedatectl", intern = TRUE): running command 'timedatectl'
## had status 1
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
all_counts <- readRDS(file= "Data/pc_count_matrix.rds")
avg_counts <- readRDS(file="Data/avg_pc_count_matrix.rds")
diff_counts <- readRDS(file="Data/diff_pc_count_matrix.rds")
meta <- suppressMessages(readr::read_tsv("Data/complete_meta_data.tsv"))
meta <- meta %>% mutate(tissue_type=replace(tissue_type, tissue_type=="embryonic facial prominence", "facial prominence"))
meta <- meta %>% mutate(tissue_type=replace(tissue_type, tissue_type=="skeletal muscle tissue", "muscle tissue"))
meta <- meta %>% mutate(tissue_type=replace(tissue_type, tissue_type=="skeletal muscle tissue", "muscle tissue"))
tissue_types <- unique(meta %>% pull(tissue_type))
TFs<- c("Ascl1", "Hes1", "Neurod1", "Mecp2", "Mef2c", "Runx1", "Tcf4", "Pax6")

Investigate the most highly expressed gene (sum of all stages) for each tissue type

maxes_list <- list()
for (tissue in tissue_types){
  tissue_subset_ids <- meta %>% filter(tissue_type == tissue) %>% pull(id)
  all_sums <- apply(all_counts[,tissue_subset_ids],1, sum)
  maxes_list[tissue] <- names(all_sums)[which.max(all_sums)]
}
for (name in names(maxes_list)){
  print(paste0("The most highly expressed protein in ", name, " : ", maxes_list[name]))
}
## [1] "The most highly expressed protein in forebrain : Tuba1a"
## [1] "The most highly expressed protein in midbrain : Tuba1a"
## [1] "The most highly expressed protein in hindbrain : Tuba1a"
## [1] "The most highly expressed protein in facial prominence : Hba-a2"
## [1] "The most highly expressed protein in heart : Hba-a2"
## [1] "The most highly expressed protein in limb : Hba-a2"
## [1] "The most highly expressed protein in liver : Hba-a2"
## [1] "The most highly expressed protein in neural tube : Tuba1a"
## [1] "The most highly expressed protein in lung : Sftpc"
## [1] "The most highly expressed protein in stomach : Cym"
## [1] "The most highly expressed protein in intestine : mt-Atp6"
## [1] "The most highly expressed protein in kidney : mt-Atp6"
## [1] "The most highly expressed protein in thymus : Actb"
## [1] "The most highly expressed protein in muscle tissue : Acta1"
## [1] "The most highly expressed protein in spleen : Hba-a2"
## [1] "The most highly expressed protein in urinary bladder : Actg2"
## [1] "The most highly expressed protein in adrenal gland : mt-Atp6"

Same thing as above but excluding the mitochondria

maxes_list <- list()
for (tissue in tissue_types){
  tissue_subset_ids <- meta %>% filter(tissue_type == tissue) %>% pull(id)
  all_sums <- apply(all_counts[,tissue_subset_ids],1, sum)
  all_sums <- all_sums[!sapply(names(all_sums), startsWith, "mt-")]
  maxes_list[tissue] <- names(all_sums)[which.max(all_sums)]
}
for (name in names(maxes_list)){
  print(paste0("The most highly expressed protein in ", name, " : ", maxes_list[name]))
}
## [1] "The most highly expressed protein in forebrain : Tuba1a"
## [1] "The most highly expressed protein in midbrain : Tuba1a"
## [1] "The most highly expressed protein in hindbrain : Tuba1a"
## [1] "The most highly expressed protein in facial prominence : Hba-a2"
## [1] "The most highly expressed protein in heart : Hba-a2"
## [1] "The most highly expressed protein in limb : Hba-a2"
## [1] "The most highly expressed protein in liver : Hba-a2"
## [1] "The most highly expressed protein in neural tube : Tuba1a"
## [1] "The most highly expressed protein in lung : Sftpc"
## [1] "The most highly expressed protein in stomach : Cym"
## [1] "The most highly expressed protein in intestine : Apoa1"
## [1] "The most highly expressed protein in kidney : Hba-a2"
## [1] "The most highly expressed protein in thymus : Actb"
## [1] "The most highly expressed protein in muscle tissue : Acta1"
## [1] "The most highly expressed protein in spleen : Hba-a2"
## [1] "The most highly expressed protein in urinary bladder : Actg2"
## [1] "The most highly expressed protein in adrenal gland : Hba-a2"

Top 5 Universally highly expressed protein across all tissues regardless of stages (without processing)

##Collapse dev_stages
collapsed_matrices <- matrix(data=NA, nrow = nrow(all_counts), ncol = length(tissue_types))
rownames(collapsed_matrices) <- rownames(all_counts)
walker <- 1
for (tissue in tissue_types){
  tissue_subset_ids <- meta %>% filter(tissue_type == tissue) %>% pull(id)
  all_avgs <- apply(all_counts[,tissue_subset_ids],1, mean)
  collapsed_matrices[,walker] <- all_avgs
  walker <- walker + 1
}
all_sums <- apply(collapsed_matrices,1, sum)
for (i in 1:10){
  print(paste0("The highests expressed protein acrossed all tissue in order ",i ,": ", names(sort(all_sums,decreasing=TRUE))[i]))
}
## [1] "The highests expressed protein acrossed all tissue in order 1: Hba-a2"
## [1] "The highests expressed protein acrossed all tissue in order 2: Hbb-bs"
## [1] "The highests expressed protein acrossed all tissue in order 3: mt-Atp6"
## [1] "The highests expressed protein acrossed all tissue in order 4: Hba-a1"
## [1] "The highests expressed protein acrossed all tissue in order 5: Hbb-bt"
## [1] "The highests expressed protein acrossed all tissue in order 6: mt-Atp8"
## [1] "The highests expressed protein acrossed all tissue in order 7: Actb"
## [1] "The highests expressed protein acrossed all tissue in order 8: Ftl1"
## [1] "The highests expressed protein acrossed all tissue in order 9: Ppia"
## [1] "The highests expressed protein acrossed all tissue in order 10: Acta1"

Expression profile of the highest expressed protein across all tissues Hba-a2 Extract only Hba-a2 information

library(ggplot2)
target_protein <- "Hba-a2"
unique_dev_stages <- unique(meta %>% pull(dev_stage))
Hba_matrix <- matrix(data=NA, nrow=length(tissue_types), ncol=3)
colnames(Hba_matrix) <- c("count", "dev_stage", "tissue_type")
Hba_frame <- data.frame(Hba_matrix)
walker <- 1
for (exp_id in names(avg_counts[target_protein,])){
    Hba_frame[walker, "count"] <- avg_counts[target_protein, exp_id]
    Hba_frame[walker, "dev_stage"] <- meta %>% filter(id==exp_id) %>% pull(dev_stage)
    Hba_frame[walker, "tissue_type"] <- meta %>% filter(id==exp_id) %>% pull(tissue_type)
    walker <- walker + 1
}
Hba_frame$count <- log2(Hba_frame$count) 

Plot the extracted information

library(ggrepel)
make_label <- function(row){
  if (row['dev_stage'] == max(Hba_frame %>% filter(tissue_type==row['tissue_type']) %>% pull(dev_stage))){
    #print(max(Hba_frame %>% filter(tissue_type==tissue) %>% pull(dev_stage)))\
     return(as.character(row['tissue_type']))
  }else {

    return(NA_character_)
  }
}
Hba_frame[,"label"] <- Hba_frame %>% apply(1,make_label)
# Hba_frame <- Hba_frame %>%
#   mutate(label = if_else(dev_stage == max(Hba_frame %>% filter()), as.character(tissue_type), NA_character_))
ggplot(Hba_frame, aes(x=dev_stage, y=count, group=tissue_type, colour=tissue_type)) + geom_line() + geom_point() + expand_limits(x= c(4, 10)) +
  # geom_text_repel(
  #   aes(label = gsub("^.*$", " ", label)),
  #   # This will force the correct position of the link's right end.   

  #   segment.curvature = -0.1,
  #   segment.square = TRUE,
  #   segment.color = 'grey',
  #   box.padding = 0.1,
  #   point.padding = 0.6,
  #   nudge_x = 1,
  #   nudge_y = 1,
  #   force = 15,
  #   hjust = 0,
  #   direction = "y",
  #   na.rm = TRUE,
  #   xlim = c(5, 10),
  #   ylim = c(0, 150000),
  # ) +
  geom_text_repel(data = . %>% filter(!is.na(label)),
                  aes(label = paste0("  ", label)),
                  #segment.alpha = 0, ## This will 'hide' the link
                  segment.curvature = -0.1,
                  segment.square = TRUE,
                  # segment.color = 'grey',
                  box.padding = 0.1,
                  point.padding = 0.6,
                  nudge_x = 1,
                  nudge_y = 1,
                 force = 0.5,
                  hjust = 0,
                  direction="y",
                  na.rm = TRUE, 
                  xlim = c(5, 10),
                  ylim = c(10,17),
)                   +
  ylab("counts(log2)") + ggtitle("Expression pattern of Hba-a2 across tissues") + theme(plot.title = element_text(hjust = 0.5))

Top 5 Universally highly expressed protein across all tissues regardless of stages (by percentage in tissues)

##Collapse dev_stages
collapsed_matrices <- matrix(data=NA, nrow = nrow(all_counts), ncol = length(tissue_types))
rownames(collapsed_matrices) <- rownames(all_counts)
walker <- 1
for (tissue in tissue_types){
  tissue_subset_ids <- meta %>% filter(tissue_type == tissue) %>% pull(id)
  all_avgs <- apply(all_counts[,tissue_subset_ids],1, mean)
  percent_counts <- all_avgs/sum(all_avgs)
  collapsed_matrices[,walker] <- percent_counts
  walker <- walker + 1
}
all_sums <- apply(collapsed_matrices,1, sum)
for (i in 1:10){
  print(paste0("The highests expressed protein acrossed all tissue in order ",i ,": ", names(sort(all_sums,decreasing=TRUE))[i]))
}
## [1] "The highests expressed protein acrossed all tissue in order 1: Hba-a2"
## [1] "The highests expressed protein acrossed all tissue in order 2: Hbb-bs"
## [1] "The highests expressed protein acrossed all tissue in order 3: mt-Atp6"
## [1] "The highests expressed protein acrossed all tissue in order 4: Hba-a1"
## [1] "The highests expressed protein acrossed all tissue in order 5: mt-Atp8"
## [1] "The highests expressed protein acrossed all tissue in order 6: Hbb-bt"
## [1] "The highests expressed protein acrossed all tissue in order 7: Actb"
## [1] "The highests expressed protein acrossed all tissue in order 8: Ftl1"
## [1] "The highests expressed protein acrossed all tissue in order 9: Ppia"
## [1] "The highests expressed protein acrossed all tissue in order 10: Acta1"

Top 5 Universally highly expressed protein across all tissues regardless of stages (by ranking)

##Collapse dev_stages
collapsed_matrices <- matrix(data=NA, nrow = nrow(all_counts), ncol = length(tissue_types))
rownames(collapsed_matrices) <- rownames(all_counts)
walker <- 1
for (tissue in tissue_types){
  tissue_subset_ids <- meta %>% filter(tissue_type == tissue) %>% pull(id)
  all_avgs <- apply(all_counts[,tissue_subset_ids],1, mean)
  all_ranking <- rank(all_avgs)
  collapsed_matrices[,walker] <- all_ranking
  walker <- walker + 1
}
all_sums <- apply(collapsed_matrices,1, sum)
for (i in 1:10){
  print(paste0("The highests expressed protein acrossed all tissue in order ",i ,": ", names(sort(all_sums, decreasing = TRUE))[i]))
}
## [1] "The highests expressed protein acrossed all tissue in order 1: mt-Atp6"
## [1] "The highests expressed protein acrossed all tissue in order 2: mt-Atp8"
## [1] "The highests expressed protein acrossed all tissue in order 3: Hba-a2"
## [1] "The highests expressed protein acrossed all tissue in order 4: Actb"
## [1] "The highests expressed protein acrossed all tissue in order 5: Ppia"
## [1] "The highests expressed protein acrossed all tissue in order 6: Ftl1"
## [1] "The highests expressed protein acrossed all tissue in order 7: mt-Nd1"
## [1] "The highests expressed protein acrossed all tissue in order 8: Eef1a1"
## [1] "The highests expressed protein acrossed all tissue in order 9: mt-Cytb"
## [1] "The highests expressed protein acrossed all tissue in order 10: mt-Co1"

expression distribution acrossed different tissues (box plot)

library(cowplot)
all_counts_melted <- data.frame(all_counts) %>% gather("id", "count")
all_counts_melted <- all_counts_melted %>% inner_join((meta %>% select(id, tissue_type, dev_stage)), by="id")
  ggplot(data=all_counts_melted, aes(x=dev_stage, y=log2(count+1))) + geom_boxplot() +  facet_wrap(~ tissue_type, ncol=3) + theme_cowplot(12) + geom_hline(yintercept=median(log2(all_counts+1)), colour="red")

expression distribution acrossed different tissues (histogram)

all_counts_melted <- data.frame(all_counts) %>% gather("id", "count")
all_counts_melted <- all_counts_melted %>% inner_join((meta %>% select(id, tissue_type, dev_stage)), by="id")
  suppressWarnings(ggplot(data=all_counts_melted, aes(x=log2(count+1), fill=dev_stage)) + geom_histogram(binwidth = 0.5) +  facet_wrap(~ tissue_type, ncol=3) + theme_cowplot(12))

expression distribution acrossed different tissues (histogram log transformed)

all_counts_melted <- data.frame(all_counts) %>% gather("id", "count")
all_counts_melted <- all_counts_melted %>% inner_join((meta %>% select(id, tissue_type, dev_stage)), by="id")
  suppressWarnings(ggplot(data=all_counts_melted, aes(x=log2(count+1), y=..density.. ,fill=dev_stage)) + geom_histogram(binwidth = 0.5) +  facet_wrap(~ tissue_type, ncol=3) + theme_cowplot(12))

expression distribution acrossed different tissues (box plot collapsed dev_stages)

all_counts_melted <- data.frame(all_counts) %>% gather("id", "count")
all_counts_melted <- all_counts_melted %>% inner_join((meta %>% select(id, tissue_type, dev_stage)), by="id")
  ggplot(data=all_counts_melted, aes(y=log2(count+1))) + geom_boxplot() +  facet_grid(. ~ tissue_type) + theme_cowplot(12) + theme(
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) + xlab("tissue_types") + geom_hline(yintercept=median(log2(all_counts+1)), colour="red")

expression distribution acrossed different tissues (histogram collapsed by dev_stage)

all_counts_melted <- data.frame(all_counts) %>% gather("id", "count")
all_counts_melted <- all_counts_melted %>% inner_join((meta %>% select(id, tissue_type, dev_stage)), by="id")
  suppressWarnings(ggplot(data=all_counts_melted, aes(x=log2(count+1), fill=tissue_type, y=..density..)) + geom_histogram(binwidth = 0.5) + theme_cowplot(12))  

expression distribution acrossed different tissues (freq_plot collapsed by dev_stage)

all_counts_melted <- data.frame(all_counts) %>% gather("id", "count")
all_counts_melted <- all_counts_melted %>% inner_join((meta %>% select(id, tissue_type, dev_stage)), by="id")
  ggplot(data=all_counts_melted, aes(x=log2(count+1), y=..density.., colour=tissue_type)) + geom_freqpoly(binwidth=0.5) + theme_cowplot(12) + scale_x_continuous(limits=c(0, 20))

Mean-variance

library(ggrepel)
all_counts_mean_var <- all_counts
all_counts_mean_var <- cbind(all_counts_mean_var, mean=apply(all_counts,1,mean))
all_counts_mean_var <- cbind(all_counts_mean_var, variance=apply(all_counts,1,var))
ggplot(data=data.frame(all_counts_mean_var), aes(x=log2(mean+1), y=log2(variance+1))) + geom_point() + theme_cowplot(12) + 
  geom_text_repel(data=data.frame(all_counts_mean_var) %>% rownames_to_column(var="rowname") %>% slice_max(variance, n=10), aes(x=log2(mean+1), y=log2(variance), label=rowname, colour="red")) +
  geom_text_repel(data=data.frame(all_counts_mean_var) %>% rownames_to_column(var="rowname") %>% slice_max(mean, n=10), aes(x=log2(mean+1), y=log2(variance), label=rowname, colour="blue")) + scale_colour_discrete(name = "Top elements", labels = c("Variance", "Mean"))

Mean and variance all together but hue by organism type

collapsed_matrices <- data.frame(mean=double(), variance=double(), tissue=character(), row=character())
colnames(collapsed_matrices) <- c("mean", "variance", "tissue")
for (tissue in tissue_types){
  tissue_subset_ids <- meta %>% filter(tissue_type == tissue) %>% pull(id)
  all_means <- apply(all_counts[,tissue_subset_ids],1, mean)
  all_variance <- apply(all_counts[,tissue_subset_ids],1, var)
  temp_matrix <- data.frame(mean=all_means, variance=all_variance, tissue) %>% rownames_to_column("rowname")
  collapsed_matrices <- rbind(collapsed_matrices, temp_matrix)
}
ggplot(data=collapsed_matrices, aes(x=log2(mean+1), y=log2(variance+1), colour=tissue)) + geom_point()+ theme_cowplot(12) + 
  geom_text_repel(data=collapsed_matrices %>% slice_max(variance+mean, n=15), aes(x=log2(mean+1), y=log2(variance+1), label=rowname, colour=tissue)) 

  #geom_text_repel(data=collapsed_matrices %>% rownames_to_column("row") %>% slice_max(mean, n=10), aes(x=log2(mean+1), y=log2(variance+1), label=row, colour=tissue))
  
  #scale_colour_discrete(name = "Top elements", labels = c("Variance", "Mean"))

Variability across tissue types

ggplot(data=collapsed_matrices, aes(x=log2(mean+1), y=log2(variance+1))) + geom_point() +  facet_wrap(~ tissue, ncol=3) + theme_cowplot(12) 

Zooming in on highly epxressed gene (>1025)

ggplot(data=collapsed_matrices  %>% filter(log2(mean+1) >=10), aes(x=log2(mean+1), y=log2(variance+1))) + geom_point() +  facet_wrap(~ tissue, ncol=3) + theme_cowplot(12) 

Distribution of the TFs

TFCounts <- data.frame(all_counts[TFs,])
TFCounts <- TFCounts %>% rownames_to_column("rowname")
TFCounts <- TFCounts %>% gather("id", "counts", -rowname)
TFCounts <- TFCounts %>% left_join((meta %>% select(tissue_type, dev_stage, id)), by="id")

ggplot(data=TFCounts, aes(x=counts)) + geom_histogram(bins = 100) + facet_grid(~  rowname) + theme_cowplot(12)

Distribution of the TFs (Log transformed)

TFCounts <- data.frame(all_counts[TFs,])
TFCounts <- TFCounts %>% rownames_to_column("rowname")
TFCounts <- TFCounts %>% gather("id", "counts", -rowname)
TFCounts <- TFCounts %>% left_join((meta %>% select(tissue_type, dev_stage, id)), by="id")

ggplot(data=TFCounts, aes(x=log2(counts+1))) + geom_histogram(bins = 100) + facet_grid(~  rowname) + theme_cowplot(12)

Distribution of the TFs (Log transformed)

TFCounts <- data.frame(all_counts[TFs,])
TFCounts <- TFCounts %>% rownames_to_column("rowname")
TFCounts <- TFCounts %>% gather("id", "counts", -rowname)
TFCounts <- TFCounts %>% left_join((meta %>% select(tissue_type, dev_stage, id)), by="id")

ggplot(data=collapsed_matrices %>% filter(rowname %in% TFs), aes(x=log2(mean+1), y=log2(variance+1), colour=tissue)) + geom_point()+ theme_cowplot(12) + 
  geom_text_repel(data=collapsed_matrices %>% filter(rowname %in% TFs) %>% slice_max(variance+mean, n=15), aes(x=log2(mean+1), y=log2(variance+1), label=rowname, colour=tissue)) 

Box plots for TFs across tissues

Tcf4Counts <- TFCounts %>% filter(rowname=="Tcf4")

ggplot(data=TFCounts, aes(y=counts, x=tissue_type)) + geom_boxplot() + theme(axis.text.x = element_text(size = 12, angle = 45, hjust = 1)) + facet_wrap(~ rowname) + geom_hline(yintercept=mean(TFCounts$counts), colour="red")

Tcf4 seems to be involved in neuronal differentiation so it makes sense for it to be highly expressed in the brains/neural tube but no so much in liver Some interesting observation, Mecp2 is basically not epxressed at all same as Runx1(except in thymus), and Tcf4 is one of the most active

Expression across dev_stage and tissues

ggplot(data=TFCounts, aes(x=dev_stage, y=counts, colour=rowname)) + geom_point() + theme(axis.text.x = element_text(size = 12, angle = 45, hjust = 1)) + facet_wrap(~ tissue_type) + scale_colour_discrete(name = "Gene")

Expression across dev_stage and tissues (log transformed)

ggplot(data=TFCounts, aes(x=dev_stage, y=log2(counts+1), colour=rowname)) + geom_point() + theme(axis.text.x = element_text(size = 12, angle = 45, hjust = 1)) + facet_wrap(~ tissue_type) + scale_colour_discrete(name = "Gene")

histograms of expresssion distribution by tissues

ggplot(data=TFCounts, aes(x=log2(counts+1), fill=rowname)) + geom_histogram(bins=100) + theme(axis.text.x = element_text(size = 12, angle = 45, hjust = 1)) + facet_wrap(~ tissue_type, ncol=3) + scale_colour_discrete(name = "Gene")

cluster correlation heatmap

library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library(RColorBrewer)
# Get some nicer colours
mypalette <- brewer.pal(11, "RdYlBu")
# http://colorbrewer2.org/#type=sequential&scheme=BuGn&n=3
morecols <- colorRampPalette(mypalette)
# Set up colour vector for celltype variable
col.cell <- (c("purple","orange")[meta$tissue_type])[1:(length(meta$tissue_type)/2)]
# Plot the 
avg_counts_tissue_name <- avg_counts
colnames(avg_counts_tissue_name) <- sapply(colnames(avg_counts), function(x){temp_row <- meta %>% filter(id==x) 
                                                                             return(paste0(temp_row %>% pull(tissue_type), " ",temp_row %>% pull(dev_stage)))})
heatmap.2(log2(avg_counts_tissue_name+1)[(order(apply(avg_counts,1,var), decreasing = TRUE))[1:50],], 
          col=rev(morecols(50)),
          trace="column", 
          main="Top 50 most variable genes across samples",
          ColSideColors=col.cell,
          scale = "row")

cluster correlation heatmap (only TFs)

library(gplots)
library(RColorBrewer)
# Get some nicer colours
mypalette <- brewer.pal(11, "RdYlBu")
# http://colorbrewer2.org/#type=sequential&scheme=BuGn&n=3
morecols <- colorRampPalette(mypalette)
# Set up colour vector for celltype variable
col.cell <- (c("purple","orange")[meta$tissue_type])[1:(length(meta$tissue_type)/2)]
# Plot the heatmap
avg_counts_tissue_name <- avg_counts
colnames(avg_counts_tissue_name) <- sapply(colnames(avg_counts), function(x){temp_row <- meta %>% filter(id==x) 
                                                                             return(paste0(temp_row %>% pull(tissue_type), " ",temp_row %>% pull(dev_stage)))})
heatmap.2(log2(avg_counts_tissue_name+1)[TFs,], 
          col=rev(morecols(length(TFs))),
          trace="column", 
          main="TF correlation across samples",
          ColSideColors=col.cell,
          scale="row")

Midbrain cluster correlation heatmap (only TFs)

# Set up colour vector for celltype variable
col.cell <- (c("purple","orange")[meta$tissue_type])[1:(length(meta %>% filter(tissue_type=="midbrain") %>% pull(id)))]
# Plot the heatmap
midbrain_id <- meta %>% filter(tissue_type == "midbrain") %>% pull(id)
midbrain_counts <- all_counts[,midbrain_id]
colnames(midbrain_counts) <- sapply(colnames(midbrain_counts), function(x){temp_row <- meta %>% filter(id==x) 
                                                                             return(paste0(temp_row %>% pull(dev_stage)))})
heatmap.2(log2(midbrain_counts+1)[TFs,], 
          col=rev(morecols(length(TFs))),
          trace="column", 
          main="TF correlation across samples",
          ColSideColors=col.cell,
          scale="row")

Hindbrain cluster correlation heatmap (only TFs)

# Set up colour vector for celltype variable
col.cell <- (c("purple","orange")[meta$tissue_type])[1:(length(meta %>% filter(tissue_type=="hindbrain") %>% pull(id)))]
# Plot the heatmap
hindbrain_id <- meta %>% filter(tissue_type == "hindbrain") %>% pull(id)
hindbrain_counts <- all_counts[,hindbrain_id]
colnames(hindbrain_counts) <- sapply(colnames(hindbrain_counts), function(x){temp_row <- meta %>% filter(id==x) 
                                                                             return(paste0(temp_row %>% pull(dev_stage)))})
heatmap.2(log2(hindbrain_counts+1)[TFs,], 
          col=rev(morecols(length(TFs))),
          trace="column", 
          main="TF correlation across samples",
          ColSideColors=col.cell,
          scale="row")

Completely different expression pattern in different tissues

variability (across tissues, within tissues as well) mean-variance is the ranking similar across tissues

##Remove zero variance 
all_counts_drop_var <- (t(all_counts))[, which(apply(t(all_counts),2,var) !=0)]
all_counts.pca <- prcomp(all_counts_drop_var, center = TRUE,scale. = TRUE)
library(ggfortify)
autoplot(all_counts.pca, data=meta, colour="dev_stage") + theme_cowplot(12)